"""
Phase 3: Deeper Mechanism Analysis — Pension Fund Home Bias
============================================================
Tests:
  (a) Cointegration (Kao test)
  (b) Bootstrap standard errors
  (c) Placebo / permutation test
  (d) Leave-one-out country analysis
  (e) Regional jackknife
  (f) Z × financial_depth interactions
  (g) Z × income_group interactions
  (h) Eurozone subsample
  (i) Time-varying analysis (rolling windows, decade splits)

Output: output/tables/ (markdown files)
"""

import sys
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
from scipy import stats

warnings.filterwarnings("ignore", category=RuntimeWarning)

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/pension_home_bias")
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
TABLES_DIR = PROJECT_DIR / "output" / "tables"
TABLES_DIR.mkdir(parents=True, exist_ok=True)

# ── Eurozone members with time-varying membership ──────────────────────
EZ_MEMBERS = {
    'AUT': 1999, 'BEL': 1999, 'FIN': 1999, 'FRA': 1999, 'DEU': 1999,
    'IRL': 1999, 'ITA': 1999, 'LUX': 1999, 'NLD': 1999, 'PRT': 1999,
    'ESP': 1999, 'GRC': 2001, 'SVN': 2007, 'CYP': 2008, 'MLT': 2008,
    'SVK': 2009, 'EST': 2011, 'LVA': 2014, 'LTU': 2015,
}

OECD_38 = [
    "AUS", "AUT", "BEL", "CAN", "CHL", "COL", "CRI", "CZE", "DNK", "EST",
    "FIN", "FRA", "DEU", "GRC", "HUN", "ISL", "IRL", "ISR", "ITA", "JPN",
    "KOR", "LVA", "LTU", "LUX", "MEX", "NLD", "NZL", "NOR", "POL", "PRT",
    "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "GBR", "USA",
]

# ── Regional classification ───────────────────────────────────────────
REGION_MAP = {
    'Europe': ['AUT','BEL','BGR','HRV','CYP','CZE','DNK','EST','FIN','FRA',
               'DEU','GRC','HUN','ISL','IRL','ITA','LVA','LTU','LUX','MLT',
               'NLD','NOR','POL','PRT','ROU','SVK','SVN','ESP','SWE','CHE',
               'GBR','SRB','MKD','ALB','BIH','MNE','MDA','BLR','UKR','RUS'],
    'North America': ['USA','CAN','MEX'],
    'East Asia & Pacific': ['JPN','KOR','CHN','AUS','NZL','SGP','HKG','TWN',
                            'THA','MYS','IDN','PHL','VNM','MMR','KHM','LAO',
                            'MNG','FJI','PNG','BRN'],
    'South Asia': ['IND','PAK','BGD','LKA','NPL','AFG','BTN','MDV'],
    'Latin America': ['BRA','ARG','CHL','COL','PER','VEN','ECU','BOL','PRY',
                      'URY','CRI','PAN','GTM','HND','SLV','NIC','DOM','HTI',
                      'JAM','TTO','CUB'],
    'Middle East & North Africa': ['SAU','ARE','QAT','KWT','BHR','OMN','ISR',
                                    'JOR','LBN','IRQ','IRN','EGY','MAR','TUN',
                                    'DZA','LBY'],
    'Sub-Saharan Africa': ['NGA','ZAF','KEN','GHA','ETH','TZA','UGA','MOZ',
                           'CMR','CIV','SEN','ZMB','ZWE','AGO','SDN','MLI',
                           'BFA','NER','TCD','MWI','RWA','BDI','TGO','BEN',
                           'SLE','LBR','GIN','GAB','COG','COD','NAM','BWA',
                           'MUS','MDG','SOM','ERI','SWZ','LSO','GMB','GNB',
                           'CPV','COM','STP','SYC','DJI'],
}


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.10: return '*'
    return ''


def run_model(df, dep_var, regressors, label, silent=False):
    """Run PanelGLS and return results dict."""
    regressors = [r for r in regressors if r in df.columns]
    if dep_var not in df.columns:
        if not silent:
            print(f"  [{label}] {dep_var} missing — skipping")
        return None
    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 50:
        if not silent:
            print(f"  [{label}] Insufficient obs ({len(sub)}) — skipping")
        return None
    gls = PanelGLS()
    gls.fit(sub[dep_var].values, sub[regressors].values,
            sub['iso3'].values, sub['year'].values)
    if not silent:
        print(f"  [{label}]  N={gls.n_obs}, countries={gls.n_countries}, R²={gls.r_squared:.4f}")
    results = {
        'label': label, 'dep_var': dep_var,
        'n_obs': gls.n_obs, 'n_countries': gls.n_countries,
        'r_squared': gls.r_squared,
    }
    for i, name in enumerate(regressors):
        results[f'coef_{name}'] = gls.beta[i]
        results[f'se_{name}'] = gls.se[i]
        results[f'p_{name}'] = gls.pvalues[i]
        if not silent:
            sig = stars(gls.pvalues[i])
            print(f"    {name:<30} {gls.beta[i]:>10.4f} ({gls.se[i]:.4f}) {sig}")
    return results


def build_table(results, key_vars, notes, filename, title):
    """Write a markdown table to output/tables/."""
    if not results:
        return
    md = [f"# {title}\n"]
    md.append("| Model | Dep Var | N | Countries | R² |")
    md.append("|---|---|---|---|---|")
    for r in results:
        md.append(f"| {r['label']} | {r['dep_var']} | {r['n_obs']:,} "
                  f"| {r['n_countries']} | {r['r_squared']:.3f} |")
    md.append("\n## Key Coefficients\n")
    md.append("| Model | Variable | Coef | SE | p-value | Sig |")
    md.append("|---|---|---|---|---|---|")
    for r in results:
        for var in key_vars:
            ckey = f'coef_{var}'
            if ckey in r:
                p = r[f'p_{var}']
                md.append(f"| {r['label']} | {var} | {r[ckey]:.4f} "
                          f"| {r[f'se_{var}']:.4f} | {p:.4f} | {stars(p)} |")
    md.append(f"\n*{notes}*")
    out = TABLES_DIR / filename
    out.write_text('\n'.join(md))
    print(f"  Saved: {out}")


def assign_region(iso3):
    for region, countries in REGION_MAP.items():
        if iso3 in countries:
            return region
    return 'Other'


# ═══════════════════════════════════════════════════════════════════════
# PART A: COINTEGRATION TESTS (Kao test)
# ═══════════════════════════════════════════════════════════════════════

def kao_cointegration_test(df, dep_var, regressors):
    """
    Kao (1999) panel cointegration test.
    Run ADF on OLS residuals from the panel regression.
    H0: no cointegration (unit root in residuals).
    """
    regressors = [r for r in regressors if r in df.columns]
    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 100:
        return None

    # OLS residuals
    from numpy.linalg import lstsq
    y = sub[dep_var].values
    X = sub[regressors].values
    X_c = np.column_stack([np.ones(len(y)), X])
    beta, _, _, _ = lstsq(X_c, y, rcond=None)
    resid = y - X_c @ beta

    sub = sub.copy()
    sub['_resid'] = resid
    sub = sub.sort_values(['iso3', 'year'])

    # Panel ADF: pool lagged residual regressions
    adf_t_stats = []
    for iso, grp in sub.groupby('iso3'):
        if len(grp) < 10:
            continue
        e = grp['_resid'].values
        de = np.diff(e)
        e_lag = e[:-1]
        if len(de) < 5:
            continue
        X_adf = np.column_stack([e_lag, np.ones(len(e_lag))])
        try:
            b, _, _, _ = lstsq(X_adf, de, rcond=None)
            resid_adf = de - X_adf @ b
            se = np.sqrt(np.sum(resid_adf**2) / (len(de) - 2) *
                         np.linalg.inv(X_adf.T @ X_adf)[0, 0])
            if se > 0:
                adf_t_stats.append(b[0] / se)
        except Exception:
            continue

    if not adf_t_stats:
        return None

    # Kao test statistic: standardized mean of ADF t-stats
    t_bar = np.mean(adf_t_stats)
    n = len(adf_t_stats)
    # Under H0, t_bar ~ N(-1.50, 1/sqrt(N)) approximately
    # Kao (1999) critical values: mean ~ -1.50, sd ~ 0.65 under null
    kao_stat = (t_bar - (-1.50)) / (0.65 / np.sqrt(n))
    kao_p = stats.norm.sf(kao_stat)  # one-sided: reject if sufficiently negative

    return {
        'n_countries': n,
        't_bar': t_bar,
        'kao_stat': kao_stat,
        'p_value': kao_p,
    }


# ═══════════════════════════════════════════════════════════════════════
# PART B: BOOTSTRAP STANDARD ERRORS
# ═══════════════════════════════════════════════════════════════════════

def bootstrap_se(df, dep_var, regressors, n_boot=500, seed=42):
    """
    Block bootstrap by country. Returns bootstrap SEs and CIs for Z_1.
    """
    regressors = [r for r in regressors if r in df.columns]
    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 100:
        return None

    countries = sub['iso3'].unique()
    n_countries = len(countries)
    rng = np.random.RandomState(seed)

    boot_coefs = []
    for b in range(n_boot):
        boot_countries = rng.choice(countries, size=n_countries, replace=True)
        frames = []
        for c in boot_countries:
            frames.append(sub[sub['iso3'] == c])
        boot_df = pd.concat(frames, ignore_index=True)

        try:
            gls = PanelGLS()
            gls.fit(boot_df[dep_var].values, boot_df[regressors].values,
                    boot_df['iso3'].values, boot_df['year'].values)
            boot_coefs.append(gls.beta.copy())
        except Exception:
            continue

    if len(boot_coefs) < 100:
        return None

    boot_coefs = np.array(boot_coefs)
    boot_se = np.std(boot_coefs, axis=0)

    result = {'n_boot': len(boot_coefs)}
    for i, name in enumerate(regressors):
        result[f'boot_se_{name}'] = boot_se[i]
        result[f'boot_mean_{name}'] = np.mean(boot_coefs[:, i])
        result[f'boot_ci_lo_{name}'] = np.percentile(boot_coefs[:, i], 2.5)
        result[f'boot_ci_hi_{name}'] = np.percentile(boot_coefs[:, i], 97.5)

    return result


# ═══════════════════════════════════════════════════════════════════════
# PART C: PLACEBO / PERMUTATION TEST
# ═══════════════════════════════════════════════════════════════════════

def placebo_test(df, dep_var, regressors, n_perm=1000, seed=42):
    """
    Permutation test: randomly reassign Z_1 across countries within year
    to test whether the observed Z_1 coefficient is distinguishable from noise.
    """
    regressors = [r for r in regressors if r in df.columns]
    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 100:
        return None

    # True coefficient
    gls = PanelGLS()
    gls.fit(sub[dep_var].values, sub[regressors].values,
            sub['iso3'].values, sub['year'].values)
    true_z1_idx = regressors.index('Z_1') if 'Z_1' in regressors else 0
    true_coef = gls.beta[true_z1_idx]

    rng = np.random.RandomState(seed)
    perm_coefs = []

    for _ in range(n_perm):
        perm_df = sub.copy()
        # Shuffle Z_1 within each year
        for yr in perm_df['year'].unique():
            mask = perm_df['year'] == yr
            z1_vals = perm_df.loc[mask, 'Z_1'].values.copy()
            rng.shuffle(z1_vals)
            perm_df.loc[mask, 'Z_1'] = z1_vals

        try:
            gls_p = PanelGLS()
            gls_p.fit(perm_df[dep_var].values, perm_df[regressors].values,
                      perm_df['iso3'].values, perm_df['year'].values)
            perm_coefs.append(gls_p.beta[true_z1_idx])
        except Exception:
            continue

    if not perm_coefs:
        return None

    perm_coefs = np.array(perm_coefs)
    # Two-sided p-value
    p_value = np.mean(np.abs(perm_coefs) >= np.abs(true_coef))

    return {
        'true_coef': true_coef,
        'perm_mean': np.mean(perm_coefs),
        'perm_sd': np.std(perm_coefs),
        'perm_p': p_value,
        'n_perm': len(perm_coefs),
        'pct_more_extreme': p_value * 100,
    }


# ═══════════════════════════════════════════════════════════════════════
# MAIN
# ═══════════════════════════════════════════════════════════════════════

def main():
    print("=" * 70)
    print("PHASE 3: Deeper Mechanism Analysis — Pension Fund Home Bias")
    print("=" * 70)

    df = pd.read_csv(PROCESSED_DIR / "pension_panel.csv")
    df = df[df['year'] <= 2024].copy()
    print(f"Panel: {df['iso3'].nunique()} countries, {len(df):,} obs, years {df['year'].min()}-{df['year'].max()}")

    # Assign regions
    df['region'] = df['iso3'].apply(assign_region)

    # Reconstruct financial_depth from available variables
    # Use log(gross_assets_gdp) as a proxy for financial depth/development
    # since the underlying WDI variables are missing
    if df['financial_depth'].sum() == 0:
        print("  financial_depth is zero — reconstructing from kaopen + log_gross_assets")
        # Use kaopen as primary financial openness/depth proxy
        df['financial_depth'] = df['kaopen'].copy()
        # Rebuild interactions
        for zv in ['Z_1', 'Z_2', 'Z_3']:
            df[f'{zv}_x_findepth'] = df[zv] * df['financial_depth']

    # Eurozone dummy (time-varying)
    df['eurozone'] = 0
    for iso, join_year in EZ_MEMBERS.items():
        df.loc[(df['iso3'] == iso) & (df['year'] >= join_year), 'eurozone'] = 1

    demo_vars = ['Z_1', 'Z_2', 'Z_3']
    controls = ['rgdp_growth', 'fiscal_bal_gdp', 'kaopen']
    controls = [c for c in controls if c in df.columns]

    # ═══════════════════════════════════════════════════════════════════
    # PART A: COINTEGRATION TEST
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART A: KAO COINTEGRATION TEST")
    print("=" * 70)

    kao_results = []

    for dep, label in [
        ('gross_assets_gdp', 'Z → gross_assets'),
        ('portfolio_assets_gdp', 'Z → portfolio_assets'),
        ('debt_share_assets', 'Z → debt_share'),
    ]:
        kao = kao_cointegration_test(df, dep, demo_vars + controls)
        if kao:
            print(f"  {label}: t_bar={kao['t_bar']:.3f}, Kao stat={kao['kao_stat']:.3f}, "
                  f"p={kao['p_value']:.4f}, N_countries={kao['n_countries']}")
            kao_results.append({
                'test': label, 'dep_var': dep,
                'n_countries': kao['n_countries'],
                't_bar': kao['t_bar'],
                'kao_stat': kao['kao_stat'],
                'p_value': kao['p_value'],
            })

    if kao_results:
        md = ["# Kao Panel Cointegration Tests\n"]
        md.append("| Test | Dep Var | Countries | t-bar | Kao Stat | p-value | Sig |")
        md.append("|---|---|---|---|---|---|---|")
        for r in kao_results:
            md.append(f"| {r['test']} | {r['dep_var']} | {r['n_countries']} "
                      f"| {r['t_bar']:.3f} | {r['kao_stat']:.3f} | {r['p_value']:.4f} "
                      f"| {stars(r['p_value'])} |")
        md.append("\n*H0: no cointegration (unit root in residuals). Reject if p < 0.05.*")
        (TABLES_DIR / "cointegration.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'cointegration.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # PART B: BOOTSTRAP STANDARD ERRORS
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART B: BOOTSTRAP STANDARD ERRORS (500 replications)")
    print("=" * 70)

    boot = bootstrap_se(df, 'gross_assets_gdp', demo_vars + controls, n_boot=500)

    if boot:
        md = ["# Bootstrap Standard Errors (Z → gross_assets_gdp)\n"]
        md.append(f"*Block bootstrap by country, {boot['n_boot']} replications*\n")
        md.append("| Variable | Point Estimate | Analytical SE | Bootstrap SE | 95% CI (lo) | 95% CI (hi) |")
        md.append("|---|---|---|---|---|---|")

        # Get analytical SEs for comparison
        sub = df.dropna(subset=['gross_assets_gdp'] + demo_vars + controls)
        gls = PanelGLS()
        gls.fit(sub['gross_assets_gdp'].values, sub[demo_vars + controls].values,
                sub['iso3'].values, sub['year'].values)

        for i, name in enumerate(demo_vars + controls):
            bse = boot.get(f'boot_se_{name}', np.nan)
            blo = boot.get(f'boot_ci_lo_{name}', np.nan)
            bhi = boot.get(f'boot_ci_hi_{name}', np.nan)
            md.append(f"| {name} | {gls.beta[i]:.4f} | {gls.se[i]:.4f} "
                      f"| {bse:.4f} | {blo:.4f} | {bhi:.4f} |")

        z1_boot_se = boot.get('boot_se_Z_1', np.nan)
        z1_anal_se = gls.se[0]
        if z1_anal_se > 0:
            ratio = z1_boot_se / z1_anal_se
            md.append(f"\n*Bootstrap/Analytical SE ratio for Z_1: {ratio:.2f}*")
            print(f"  Bootstrap/Analytical SE ratio for Z_1: {ratio:.2f}")
            print(f"  Z_1 bootstrap 95% CI: [{boot.get('boot_ci_lo_Z_1', np.nan):.1f}, "
                  f"{boot.get('boot_ci_hi_Z_1', np.nan):.1f}]")

        (TABLES_DIR / "bootstrap_se.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'bootstrap_se.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # PART C: PLACEBO / PERMUTATION TEST
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART C: PLACEBO / PERMUTATION TEST (1000 permutations)")
    print("=" * 70)

    placebo = placebo_test(df, 'gross_assets_gdp', demo_vars + controls, n_perm=1000)

    if placebo:
        md = ["# Placebo / Permutation Test (Z_1 → gross_assets_gdp)\n"]
        md.append(f"*Z_1 shuffled within year across countries, {placebo['n_perm']} permutations*\n")
        md.append("| Statistic | Value |")
        md.append("|---|---|")
        md.append(f"| True Z_1 coefficient | {placebo['true_coef']:.4f} |")
        md.append(f"| Permutation mean | {placebo['perm_mean']:.4f} |")
        md.append(f"| Permutation SD | {placebo['perm_sd']:.4f} |")
        md.append(f"| Permutation p-value (two-sided) | {placebo['perm_p']:.4f} |")
        md.append(f"| % more extreme than true | {placebo['pct_more_extreme']:.1f}% |")
        md.append(f"\n*Permutation p-value: {placebo['perm_p']:.4f} "
                  f"({'rejects' if placebo['perm_p'] < 0.05 else 'does not reject'} null of no relationship)*")
        print(f"  True coef: {placebo['true_coef']:.1f}")
        print(f"  Permutation p-value: {placebo['perm_p']:.4f}")
        (TABLES_DIR / "placebo_test.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'placebo_test.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # PART D: LEAVE-ONE-OUT COUNTRY ANALYSIS
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART D: LEAVE-ONE-OUT COUNTRY ANALYSIS")
    print("=" * 70)

    sub = df.dropna(subset=['gross_assets_gdp'] + demo_vars + controls).copy()
    countries = sorted(sub['iso3'].unique())

    # Full-sample baseline
    gls_full = PanelGLS()
    gls_full.fit(sub['gross_assets_gdp'].values, sub[demo_vars + controls].values,
                 sub['iso3'].values, sub['year'].values)
    z1_full = gls_full.beta[0]
    print(f"  Full-sample Z_1 = {z1_full:.1f}")

    loo_results = []
    for c in countries:
        loo = sub[sub['iso3'] != c]
        r = run_model(loo, 'gross_assets_gdp', demo_vars + controls,
                      f"LOO: drop {c}", silent=True)
        if r:
            loo_results.append({
                'dropped': c,
                'z1_coef': r['coef_Z_1'],
                'z1_se': r['se_Z_1'],
                'z1_p': r['p_Z_1'],
                'n_obs': r['n_obs'],
                'r_squared': r['r_squared'],
            })

    if loo_results:
        loo_df = pd.DataFrame(loo_results)
        # Find most influential countries
        loo_df['z1_change'] = loo_df['z1_coef'] - z1_full
        loo_df['z1_change_pct'] = (loo_df['z1_change'] / abs(z1_full)) * 100
        loo_df = loo_df.sort_values('z1_change', key=abs, ascending=False)

        top_10 = loo_df.head(10)
        sign_flips = loo_df[np.sign(loo_df['z1_coef']) != np.sign(z1_full)]

        md = ["# Leave-One-Out Country Analysis (Z_1 → gross_assets_gdp)\n"]
        md.append(f"*Full-sample Z_1 = {z1_full:.1f}*\n")

        md.append("## Top 10 Most Influential Countries\n")
        md.append("| Dropped | Z_1 Coef | SE | p-value | Change | % Change |")
        md.append("|---|---|---|---|---|---|")
        for _, row in top_10.iterrows():
            md.append(f"| {row['dropped']} | {row['z1_coef']:.1f} | {row['z1_se']:.1f} "
                      f"| {row['z1_p']:.4f} | {row['z1_change']:.1f} | {row['z1_change_pct']:.1f}% |")

        md.append(f"\n## Stability Summary\n")
        md.append(f"| Statistic | Value |")
        md.append(f"|---|---|")
        md.append(f"| Countries tested | {len(loo_df)} |")
        md.append(f"| Z_1 range | [{loo_df['z1_coef'].min():.1f}, {loo_df['z1_coef'].max():.1f}] |")
        md.append(f"| Z_1 mean (LOO) | {loo_df['z1_coef'].mean():.1f} |")
        md.append(f"| Z_1 SD (LOO) | {loo_df['z1_coef'].std():.1f} |")
        md.append(f"| Sign flips | {len(sign_flips)} |")

        md.append(f"\n*{len(sign_flips)} out of {len(loo_df)} LOO drops flip the sign of Z_1.*")
        print(f"  LOO range: [{loo_df['z1_coef'].min():.1f}, {loo_df['z1_coef'].max():.1f}]")
        print(f"  Sign flips: {len(sign_flips)} / {len(loo_df)}")

        (TABLES_DIR / "leave_one_out.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'leave_one_out.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # PART E: REGIONAL JACKKNIFE
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART E: REGIONAL JACKKNIFE")
    print("=" * 70)

    jack_results = []
    for region in sorted(df['region'].unique()):
        if region == 'Other':
            continue
        jack_df = df[df['region'] != region].copy()
        n_dropped = df[df['region'] == region]['iso3'].nunique()
        r = run_model(jack_df, 'gross_assets_gdp', demo_vars + controls,
                      f"Drop {region} ({n_dropped} countries)")
        if r:
            r['region_dropped'] = region
            r['n_dropped'] = n_dropped
            jack_results.append(r)

    if jack_results:
        md = ["# Regional Jackknife (Z_1 → gross_assets_gdp)\n"]
        md.append(f"*Full-sample Z_1 = {z1_full:.1f}*\n")
        md.append("| Region Dropped | Countries Dropped | Z_1 Coef | SE | p-value | Sig | N |")
        md.append("|---|---|---|---|---|---|---|")
        for r in jack_results:
            z1c = r.get('coef_Z_1', np.nan)
            z1s = r.get('se_Z_1', np.nan)
            z1p = r.get('p_Z_1', np.nan)
            md.append(f"| {r['region_dropped']} | {r['n_dropped']} "
                      f"| {z1c:.1f} | {z1s:.1f} | {z1p:.4f} | {stars(z1p)} | {r['n_obs']:,} |")
        md.append(f"\n*Regional jackknife: test whether results are driven by any single region.*")
        (TABLES_DIR / "regional_jackknife.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'regional_jackknife.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # PART F: Z × FINANCIAL DEPTH INTERACTIONS
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART F: Z × FINANCIAL DEPTH INTERACTIONS")
    print("=" * 70)

    f_results = []

    # F1: Z + financial_depth → gross_assets
    r = run_model(df, 'gross_assets_gdp',
                  demo_vars + controls + ['financial_depth'],
                  "F1: Z + fin_depth → gross_assets")
    if r: f_results.append(r)

    # F2: Z × financial_depth → gross_assets
    z_fd_int = [f'{zv}_x_findepth' for zv in demo_vars if f'{zv}_x_findepth' in df.columns]
    r = run_model(df, 'gross_assets_gdp',
                  demo_vars + controls + ['financial_depth'] + z_fd_int,
                  "F2: Z×fin_depth → gross_assets")
    if r: f_results.append(r)

    # F3: Above-median vs below-median financial depth
    fd_median = df['financial_depth'].median()
    hi_fd = df[df['financial_depth'] > fd_median].copy()
    lo_fd = df[df['financial_depth'] <= fd_median].copy()

    r = run_model(hi_fd, 'gross_assets_gdp', demo_vars + controls,
                  "F3: High fin_depth Z → gross_assets")
    if r: f_results.append(r)

    r = run_model(lo_fd, 'gross_assets_gdp', demo_vars + controls,
                  "F4: Low fin_depth Z → gross_assets")
    if r: f_results.append(r)

    # F5: Z × financial_depth → portfolio_assets
    r = run_model(df, 'portfolio_assets_gdp',
                  demo_vars + controls + ['financial_depth'] + z_fd_int,
                  "F5: Z×fin_depth → portfolio_assets")
    if r: f_results.append(r)

    key_f = demo_vars + ['financial_depth'] + z_fd_int
    build_table(f_results, key_f,
                "Financial depth proxied by KAOPEN (capital account openness). "
                "Interactions test whether deeper financial markets moderate demographic home bias.",
                "financial_depth_interactions.md",
                "Z × Financial Depth Interactions")

    # ═══════════════════════════════════════════════════════════════════
    # PART G: Z × INCOME GROUP INTERACTIONS
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART G: Z × INCOME GROUP INTERACTIONS")
    print("=" * 70)

    g_results = []

    # Create income group dummies and interactions
    df['inc_mid'] = (df['income_group'] == 'mid').astype(int)
    df['inc_high'] = (df['income_group'] == 'high').astype(int)
    df['Z_1_x_mid'] = df['Z_1'] * df['inc_mid']
    df['Z_1_x_high'] = df['Z_1'] * df['inc_high']

    r = run_model(df, 'gross_assets_gdp',
                  demo_vars + controls + ['inc_mid', 'inc_high',
                                          'Z_1_x_mid', 'Z_1_x_high'],
                  "G1: Z×income → gross_assets")
    if r: g_results.append(r)

    r = run_model(df, 'portfolio_assets_gdp',
                  demo_vars + controls + ['inc_mid', 'inc_high',
                                          'Z_1_x_mid', 'Z_1_x_high'],
                  "G2: Z×income → portfolio_assets")
    if r: g_results.append(r)

    r = run_model(df, 'debt_share_assets',
                  demo_vars + controls + ['inc_mid', 'inc_high',
                                          'Z_1_x_mid', 'Z_1_x_high'],
                  "G3: Z×income → debt_share")
    if r: g_results.append(r)

    key_g = demo_vars + ['inc_mid', 'inc_high', 'Z_1_x_mid', 'Z_1_x_high']
    build_table(g_results, key_g,
                "Reference category: low-income. Interactions test whether "
                "the demographic diversification effect varies by development level.",
                "income_interactions.md",
                "Z × Income Group Interactions")

    # ═══════════════════════════════════════════════════════════════════
    # PART H: EUROZONE SUBSAMPLE
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART H: EUROZONE SUBSAMPLE")
    print("=" * 70)

    h_results = []

    # H1: Eurozone members only (time-varying)
    ez_df = df[df['eurozone'] == 1].copy()
    r = run_model(ez_df, 'gross_assets_gdp', demo_vars + controls,
                  "H1: EZ members → gross_assets")
    if r: h_results.append(r)

    # H2: Eurozone → portfolio assets
    r = run_model(ez_df, 'portfolio_assets_gdp', demo_vars + controls,
                  "H2: EZ members → portfolio_assets")
    if r: h_results.append(r)

    # H3: Eurozone → debt share
    r = run_model(ez_df, 'debt_share_assets', demo_vars + controls,
                  "H3: EZ members → debt_share")
    if r: h_results.append(r)

    # H4: Non-eurozone comparison
    non_ez = df[df['eurozone'] == 0].copy()
    r = run_model(non_ez, 'gross_assets_gdp', demo_vars + controls,
                  "H4: Non-EZ → gross_assets")
    if r: h_results.append(r)

    # H5: Z × eurozone interaction (full sample)
    df['Z_1_x_ez'] = df['Z_1'] * df['eurozone']
    df['Z_2_x_ez'] = df['Z_2'] * df['eurozone']
    r = run_model(df, 'gross_assets_gdp',
                  demo_vars + controls + ['eurozone', 'Z_1_x_ez'],
                  "H5: Z×EZ → gross_assets")
    if r: h_results.append(r)

    # H6: OECD non-eurozone (control for development level)
    oecd_non_ez = df[(df['oecd'] == 1) & (df['eurozone'] == 0)].copy()
    r = run_model(oecd_non_ez, 'gross_assets_gdp', demo_vars + controls,
                  "H6: OECD non-EZ → gross_assets")
    if r: h_results.append(r)

    key_h = demo_vars + ['eurozone', 'Z_1_x_ez']
    build_table(h_results, key_h,
                "Eurozone membership is time-varying (19 members, staggered entry 1999-2015). "
                "Tests whether fixed exchange rates intensify demographic home bias.",
                "eurozone_subsample.md",
                "Eurozone Subsample Analysis")

    # ═══════════════════════════════════════════════════════════════════
    # PART I: TIME-VARYING ANALYSIS
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PART I: TIME-VARYING ANALYSIS")
    print("=" * 70)

    i_results = []

    # Decade splits
    for decade_start, decade_end, label in [
        (1990, 1999, "1990s"),
        (2000, 2009, "2000s"),
        (2010, 2019, "2010s"),
        (2020, 2024, "2020-24"),
    ]:
        dec_df = df[(df['year'] >= decade_start) & (df['year'] <= decade_end)].copy()
        r = run_model(dec_df, 'gross_assets_gdp', demo_vars + controls,
                      f"I-{label}: Z → gross_assets")
        if r: i_results.append(r)

    # Pre/post GFC
    pre_gfc = df[df['year'] < 2008].copy()
    post_gfc = df[df['year'] >= 2008].copy()
    r = run_model(pre_gfc, 'gross_assets_gdp', demo_vars + controls,
                  "I-pre2008: Z → gross_assets")
    if r: i_results.append(r)

    r = run_model(post_gfc, 'gross_assets_gdp', demo_vars + controls,
                  "I-post2008: Z → gross_assets")
    if r: i_results.append(r)

    # Structural break interaction
    df['post_gfc'] = (df['year'] >= 2008).astype(int)
    df['Z_1_x_postgfc'] = df['Z_1'] * df['post_gfc']
    r = run_model(df, 'gross_assets_gdp',
                  demo_vars + controls + ['post_gfc', 'Z_1_x_postgfc'],
                  "I-break: Z×postGFC → gross_assets")
    if r: i_results.append(r)

    # Rolling 10-year windows
    print("\n  Rolling 10-year windows:")
    rolling_results = []
    for start_yr in range(1990, 2016):
        end_yr = start_yr + 9
        roll_df = df[(df['year'] >= start_yr) & (df['year'] <= end_yr)].copy()
        r = run_model(roll_df, 'gross_assets_gdp', demo_vars + controls,
                      f"Roll {start_yr}-{end_yr}", silent=True)
        if r:
            rolling_results.append({
                'window': f"{start_yr}-{end_yr}",
                'z1_coef': r['coef_Z_1'],
                'z1_se': r['se_Z_1'],
                'z1_p': r['p_Z_1'],
                'n_obs': r['n_obs'],
                'n_countries': r['n_countries'],
            })

    key_i = demo_vars + ['post_gfc', 'Z_1_x_postgfc']
    build_table(i_results, key_i,
                "Time-varying analysis: decade splits, pre/post GFC, and structural break interaction.",
                "time_varying.md",
                "Time-Varying Analysis: Demographics → Diversification")

    if rolling_results:
        md = ["# Rolling 10-Year Window Analysis (Z_1 → gross_assets_gdp)\n"]
        md.append("| Window | Z_1 Coef | SE | p-value | Sig | N | Countries |")
        md.append("|---|---|---|---|---|---|---|")
        for r in rolling_results:
            md.append(f"| {r['window']} | {r['z1_coef']:.1f} | {r['z1_se']:.1f} "
                      f"| {r['z1_p']:.4f} | {stars(r['z1_p'])} | {r['n_obs']:,} "
                      f"| {r['n_countries']} |")
        md.append("\n*Rolling 10-year windows. Tracks stability of Z_1 coefficient over time.*")
        (TABLES_DIR / "rolling_windows.md").write_text('\n'.join(md))
        print(f"  Saved: {TABLES_DIR / 'rolling_windows.md'}")

    # ═══════════════════════════════════════════════════════════════════
    # SUMMARY TABLE
    # ═══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 70)
    print("PHASE 3 SUMMARY")
    print("=" * 70)

    tables_written = list(TABLES_DIR.glob("*.md"))
    print(f"  Tables written: {len(tables_written)}")
    for t in sorted(tables_written):
        print(f"    {t.name}")

    print("\n" + "=" * 70)
    print("Phase 3 complete.")
    print("=" * 70)


if __name__ == "__main__":
    main()
